corpus <- read_feather(here("data/ldp_sequences.feather"))
corpus <- corpus %>%
  rename(this_act = meaning,
         this_frame = utt_parse) %>%
  mutate(age_chunk = case_when(age_months <= 24 ~ 24,
                               age_months > 24 & age_months <= 36 ~ 36,
                               age_months > 36 & age_months <= 48 ~ 48,
                               age_months > 48 ~ 58)) %>%
  mutate(age_split = case_when(age_months <= 36 ~ 36,
                               age_months > 36 ~ 58))
act_sequences <- corpus %>%
  group_by(age_split, speaker, this_act, next_act) %>%
  count()

act_freqs <- act_sequences %>%
  group_by(this_act) %>%
  count()

frequent_acts <- act_freqs %>%
  filter(n >= 10)
  
ggplot(act_sequences %>% filter(speaker == "ADU", n > 2),
       aes(y = n, axis1 = this_act, axis2 = next_act, group = age_split)) +
  geom_alluvium(aes(fill = this_act)) +
  geom_stratum(color = "grey") + 
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("this act", "next act")) + 
  theme(legend.position = "none") +
  facet_wrap(~age_split, scales = "free")
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

Act transitions where the first speaker (left) is a parent. Faceted by broad age bin.

ggplot(act_sequences %>% filter(speaker == "CHI", n > 2),
       aes(y = n, axis1 = this_act, axis2 = next_act, group = age_split)) +
  geom_alluvium(aes(fill = this_act)) +
  geom_stratum(color = "grey") + 
  geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("this act", "next act")) + 
  theme(legend.position = "none") +
  facet_wrap(~age_split, scales = "free")
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

Act transitions where the first speaker (left) is a child. Faceted by broad age bin.

common_acts <- c("ask yes/no question", "declarative statement", "propose action")

ggplot(act_sequences %>% 
         filter(speaker == "ADU", this_act %in% common_acts,
                n > 50),
       aes(y = n, axis1 = this_act, axis2 = next_act, group = age_split)) +
  geom_alluvium(aes(fill = this_act)) +
  geom_stratum(color = "grey") + 
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("this act", "next act")) + 
  theme(legend.position = "none") +
  facet_wrap(~age_split, scales = "free")
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

Transitions from just three very common acts (yes/no question, declarative statement, propose action) where the first speaker is a parent.

ggplot(act_sequences %>% 
         filter(speaker == "CHI", this_act %in% common_acts,
                n > 50),
       aes(y = n, axis1 = this_act, axis2 = next_act, group = age_split)) +
  geom_alluvium(aes(fill = this_act)) +
  geom_stratum(color = "grey") + 
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("this act", "next act")) + 
  theme(legend.position = "none") +
  facet_wrap(~age_split, scales = "free")
## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

## Warning in to_lodes_form(data = data, axes = axis_ind, discern =
## params$discern): Some strata appear at multiple axes.

Transitions from just three very common acts (yes/no question, declarative statement, propose action) where the first speaker is a child.

sequence_probs <- act_sequences %>%
  group_by(this_act) %>%
  count(next_act) %>%
  mutate(prob = n/sum(n)) %>%
  ungroup()

sequence_probs %>% 
  ggplot(aes(x=next_act, y=this_act, fill=log(prob))) + 
  geom_tile() +
  viridis::scale_fill_viridis() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Heat map of all communicative act transitions.

age_sequence_probs <- act_sequences %>%
  filter(this_act %in% frequent_acts$this_act) %>%
  group_by(age_split, this_act) %>%
  count(next_act) %>%
  mutate(prob = n/sum(n)) %>%
  ungroup()

age_sequence_probs %>% 
  ggplot(aes(x=next_act, y=this_act, fill=log(prob))) + 
  geom_tile() +
  viridis::scale_fill_viridis() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  facet_wrap(~age_split)

Heat map of top 40 communicative act transitions by age split.

avg_sequences <- corpus %>%
  group_by(this_act, next_act, next_next_act) %>%
  count() %>%
  ungroup() %>%
  mutate(avg_prob = n/sum(n))

act_sequences <- corpus %>%
  group_by(age_chunk, this_act, next_act, next_next_act) %>%
  count() %>%
  ungroup() %>%
  group_by(age_chunk) %>%
  mutate(prob = n/sum(n)) %>%
  left_join(avg_sequences %>% select(-n), by = c("this_act", "next_act", "next_next_act"))%>%
  mutate(distinct = prob - avg_prob)

distinct_acts <- act_sequences %>%
  group_by(age_chunk) %>%
  arrange(desc(distinct)) %>%
  slice(1:10) %>%
  ungroup() 

distinct_acts %>% datatable()

Table of most distinctive act sequences in each age group, where distinctiveness is the diff between its probability in an age group and its probability overall.

avg_transitions <- corpus %>%
  group_by(this_act, next_act) %>%
  count() %>%
  ungroup() %>%
  mutate(avg_prob = n/sum(n))

act_transitions_distinct <- corpus %>%
  group_by(age_chunk, this_act) %>%
  count(next_act) %>%
  ungroup() %>%
  group_by(age_chunk) %>%
  mutate(prob = n/sum(n)) %>%
  left_join(avg_transitions %>% select(-n), by = c("this_act", "next_act"))%>%
  mutate(distinct = prob - avg_prob)

act_transitions_distinct %>% 
  ggplot(aes(x=next_act, y=this_act, fill=distinct)) + 
  geom_tile() +
  viridis::scale_fill_viridis() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  facet_wrap(~age_chunk)